A continuació et facilito un enllaç on apareixen tots els codigs de tots els Chapters: Codig dels 12 chapters.
Tambe l'enllaç que ens proporciona per veure de que tracta cada un dels Chapters: Llibre.
Aquí tens l'informació per a cada Chapter:
Començem amb cada Chapter:
En que consisteix el Chapter 1: L'objectiu principal d'aquest codi és crear un joc de Recerca i Rescat Bayesià utilitzant Python i OpenCV. El joc es centra en la recerca d'un mariner desaparegut en una de les tres àrees de recerca prèviament definides en un mapa. Les àrees de recerca estan definides per punts de cantonada, i la ubicació real del mariner s'assigna aleatòriament dins d'una de les àrees de recerca durant la inicialització.
Els codis que té el Chapter 1:
import sys import random import itertools import numpy as np import cv2 as cv MAP_FILE = 'cape_python.png' # Assign search area (SA) corner point locations based on image pixels. SA1_CORNERS = (130, 265, 180, 315) # (UL-X, UL-Y, LR-X, LR-Y) SA2_CORNERS = (80, 255, 130, 305) # (UL-X, UL-Y, LR-X, LR-Y) SA3_CORNERS = (105, 205, 155, 255) # (UL-X, UL-Y, LR-X, LR-Y) class Search(): """Bayesian Search & Rescue game with 3 search areas.""" def __init__(self, name): self.name = name self.img = cv.imread(MAP_FILE, cv.IMREAD_COLOR) if self.img is None: print('Could not load map file {}'.format(MAP_FILE), file=sys.stderr) sys.exit(1) # Set placeholders for sailor's actual location self.area_actual = 0 self.sailor_actual = [0, 0] # As "local" coords within search area # Create numpy arrays for each search area by indexing image array. self.sa1 = self.img[SA1_CORNERS[1] : SA1_CORNERS[3], SA1_CORNERS[0] : SA1_CORNERS[2]] self.sa2 = self.img[SA2_CORNERS[1] : SA2_CORNERS[3], SA2_CORNERS[0] : SA2_CORNERS[2]] self.sa3 = self.img[SA3_CORNERS[1] : SA3_CORNERS[3], SA3_CORNERS[0] : SA3_CORNERS[2]] # Set initial per-area target probabilities for finding sailor self.p1 = 0.2 self.p2 = 0.5 self.p3 = 0.3 # Initialize search effectiveness probabilities. self.sep1 = 0 self.sep2 = 0 self.sep3 = 0 def draw_map(self, last_known): """Display basemap with scale, last known xy location, search areas.""" # Draw the scale bar. cv.line(self.img, (20, 370), (70, 370), (0, 0, 0), 2) cv.putText(self.img, '0', (8, 370), cv.FONT_HERSHEY_PLAIN, 1, (0, 0, 0)) cv.putText(self.img, '50 Nautical Miles', (71, 370), cv.FONT_HERSHEY_PLAIN, 1, (0, 0, 0)) # Draw and number the search areas. cv.rectangle(self.img, (SA1_CORNERS[0], SA1_CORNERS[1]), (SA1_CORNERS[2], SA1_CORNERS[3]), (0, 0, 0), 1) cv.putText(self.img, '1', (SA1_CORNERS[0] + 3, SA1_CORNERS[1] + 15), cv.FONT_HERSHEY_PLAIN, 1, 0) cv.rectangle(self.img, (SA2_CORNERS[0], SA2_CORNERS[1]), (SA2_CORNERS[2], SA2_CORNERS[3]), (0, 0, 0), 1) cv.putText(self.img, '2', (SA2_CORNERS[0] + 3, SA2_CORNERS[1] + 15), cv.FONT_HERSHEY_PLAIN, 1, 0) cv.rectangle(self.img, (SA3_CORNERS[0], SA3_CORNERS[1]), (SA3_CORNERS[2], SA3_CORNERS[3]), (0, 0, 0), 1) cv.putText(self.img, '3', (SA3_CORNERS[0] + 3, SA3_CORNERS[1] + 15), cv.FONT_HERSHEY_PLAIN, 1, 0) # Post the last known location of the sailor. cv.putText(self.img, '+', (last_known), cv.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) cv.putText(self.img, '+ = Last Known Position', (274, 355), cv.FONT_HERSHEY_PLAIN, 1, (0, 0, 255)) cv.putText(self.img, '* = Actual Position', (275, 370), cv.FONT_HERSHEY_PLAIN, 1, (255, 0, 0)) cv.imshow('Search Area', self.img) cv.moveWindow('Search Area', 750, 10) cv.waitKey(500) def sailor_final_location(self, num_search_areas): """Return the actual x,y location of the missing sailor.""" # Find sailor coordinates with respect to any Search Area sub-array. self.sailor_actual[0] = np.random.choice(self.sa1.shape[1]) self.sailor_actual[1] = np.random.choice(self.sa1.shape[0]) # Pick a search area at random. area = int(random.triangular(1, num_search_areas + 1)) # Convert local search area coordinates to map coordinates. if area == 1: x = self.sailor_actual[0] + SA1_CORNERS[0] y = self.sailor_actual[1] + SA1_CORNERS[1] self.area_actual = 1 elif area == 2: x = self.sailor_actual[0] + SA2_CORNERS[0] y = self.sailor_actual[1] + SA2_CORNERS[1] self.area_actual = 2 elif area == 3: x = self.sailor_actual[0] + SA3_CORNERS[0] y = self.sailor_actual[1] + SA3_CORNERS[1] self.area_actual = 3 return x, y def calc_search_effectiveness(self): """Set decimal search effectiveness value per search area.""" self.sep1 = random.uniform(0.2, 0.9) self.sep2 = random.uniform(0.2, 0.9) self.sep3 = random.uniform(0.2, 0.9) def conduct_search(self, area_num, area_array, effectiveness_prob): """Return search results and list of searched coordinates.""" local_y_range = range(area_array.shape[0]) local_x_range = range(area_array.shape[1]) coords = list(itertools.product(local_x_range, local_y_range)) random.shuffle(coords) coords = coords[:int((len(coords) * effectiveness_prob))] loc_actual = (self.sailor_actual[0], self.sailor_actual[1]) if area_num == self.area_actual and loc_actual in coords: return 'Found in Area {}.'.format(area_num), coords return 'Not Found', coords def revise_target_probs(self): """Update area target probabilities based on search effectiveness.""" denom = self.p1 * (1 - self.sep1) + self.p2 * (1 - self.sep2) \ + self.p3 * (1 - self.sep3) self.p1 = self.p1 * (1 - self.sep1) / denom self.p2 = self.p2 * (1 - self.sep2) / denom self.p3 = self.p3 * (1 - self.sep3) / denom def draw_menu(search_num): """Print menu of choices for conducting area searches.""" print('\nSearch {}'.format(search_num)) print( """ Choose next areas to search: 0 - Quit 1 - Search Area 1 twice 2 - Search Area 2 twice 3 - Search Area 3 twice 4 - Search Areas 1 & 2 5 - Search Areas 1 & 3 6 - Search Areas 2 & 3 7 - Start Over """ ) def main(): app = Search('Cape_Python') app.draw_map(last_known=(160, 290)) sailor_x, sailor_y = app.sailor_final_location(num_search_areas=3) print("-" * 65) print("\nInitial Target (P) Probabilities:") print("P1 = {:.3f}, P2 = {:.3f}, P3 = {:.3f}".format(app.p1, app.p2, app.p3)) search_num = 1 while True: app.calc_search_effectiveness() draw_menu(search_num) choice = input("Choice: ") if choice == "0": sys.exit() elif choice == "1": results_1, coords_1 = app.conduct_search(1, app.sa1, app.sep1) results_2, coords_2 = app.conduct_search(1, app.sa1, app.sep1) app.sep1 = (len(set(coords_1 + coords_2))) / (len(app.sa1)**2) app.sep2 = 0 app.sep3 = 0 elif choice == "2": results_1, coords_1 = app.conduct_search(2, app.sa2, app.sep2) results_2, coords_2 = app.conduct_search(2, app.sa2, app.sep2) app.sep1 = 0 app.sep2 = (len(set(coords_1 + coords_2))) / (len(app.sa2)**2) app.sep3 = 0 elif choice == "3": results_1, coords_1 = app.conduct_search(3, app.sa3, app.sep3) results_2, coords_2 = app.conduct_search(3, app.sa3, app.sep3) app.sep1 = 0 app.sep2 = 0 app.sep3 = (len(set(coords_1 + coords_2))) / (len(app.sa3)**2) elif choice == "4": results_1, coords_1 = app.conduct_search(1, app.sa1, app.sep1) results_2, coords_2 = app.conduct_search(2, app.sa2, app.sep2) app.sep3 = 0 elif choice == "5": results_1, coords_1 = app.conduct_search(1, app.sa1, app.sep1) results_2, coords_2 = app.conduct_search(3, app.sa3, app.sep3) app.sep2 = 0 elif choice == "6": results_1, coords_1 = app.conduct_search(2, app.sa2, app.sep2) results_2, coords_2 = app.conduct_search(3, app.sa3, app.sep3) app.sep1 = 0 elif choice == "7": main() else: print("\nSorry, but that isn't a valid choice.", file=sys.stderr) continue app.revise_target_probs() # Use Bayes' rule to update target probs. print("\nSearch {} Results 1 = {}" .format(search_num, results_1), file=sys.stderr) print("Search {} Results 2 = {}\n" .format(search_num, results_2), file=sys.stderr) print("Search {} Effectiveness (E):".format(search_num)) print("E1 = {:.3f}, E2 = {:.3f}, E3 = {:.3f}" .format(app.sep1, app.sep2, app.sep3)) # Print target probabilities if sailor is not found else show position. if results_1 == 'Not Found' and results_2 == 'Not Found': print("\nNew Target Probabilities (P) for Search {}:" .format(search_num + 1)) print("P1 = {:.3f}, P2 = {:.3f}, P3 = {:.3f}" .format(app.p1, app.p2, app.p3)) else: cv.circle(app.img, (sailor_x, sailor_y), 3, (255, 0, 0), -1) cv.imshow('Search Area', app.img) cv.waitKey(1500) main() search_num += 1 if __name__ == '__main__': main()
Imatges utilitazades:
En que consisteix el Chapter 2:
Els codis que té el Chapter 2:
"""Read a text file and return a list of strings.""" def text_to_string(filename): strings = [] with open(filename) as f: strings.append(f.read()) return '\n'.join(strings)
"""Make a heatmap of punctuation.""" import math from string import punctuation import nltk import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap import seaborn as sns # Install seaborn using: pip install seaborn. PUNCT_SET = set(punctuation) def main(): # Load text files into dictionary by author. strings_by_author = dict() strings_by_author['doyle'] = text_to_string('hound.txt') strings_by_author['wells'] = text_to_string('war.txt') strings_by_author['unknown'] = text_to_string('lost.txt') # Tokenize text strings preserving only punctuation marks. punct_by_author = make_punct_dict(strings_by_author) # Convert punctuation marks to numerical values and plot heatmaps. plt.ion() for author in punct_by_author: heat = convert_punct_to_number(punct_by_author, author) arr = np.array((heat[:6561])) # trim to largest size for square array arr_reshaped = arr.reshape(int(math.sqrt(len(arr))), int(math.sqrt(len(arr)))) fig, ax = plt.subplots(figsize=(7, 7)) sns.heatmap(arr_reshaped, cmap=ListedColormap(['blue', 'yellow']), square=True, ax=ax) ax.set_title('Heatmap Semicolons {}'.format(author)) plt.show() def text_to_string(filename): """Read a text file and return a string.""" with open(filename) as infile: return infile.read() def make_punct_dict(strings_by_author): """Return dictionary of tokenized punctuation by corpus by author.""" punct_by_author = dict() for author in strings_by_author: tokens = nltk.word_tokenize(strings_by_author[author]) punct_by_author[author] = ([token for token in tokens if token in PUNCT_SET]) print("Number punctuation marks in {} = {}" .format(author, len(punct_by_author[author]))) return punct_by_author def convert_punct_to_number(punct_by_author, author): """Return list of punctuation marks converted to numerical values.""" heat_vals = [] for char in punct_by_author[author]: if char == ';': value = 1 else: value = 2 heat_vals.append(value) return heat_vals if __name__ == '__main__': main()
"""Use NLP (nltk) to make dispersion plot.""" import matplotlib.pyplot as plt from nltk.draw.dispersion import dispersion_plot def text_to_string(filename): """Read a text file and return a string.""" with open(filename) as infile: return infile.read() corpus = text_to_string('hound.txt') tokens = nltk.word_tokenize(corpus) tokens = nltk.Text(tokens) # NLTK wrapper for automatic text analysis. words = ['Holmes', 'Watson', 'Mortimer', 'Henry', 'Barrymore', 'Stapleton', 'Selden', 'hound'] ax = dispersion_plot(tokens, words) # Correct current bug in NLTK dispersion_plot that reverses label order by mistake: ax.set_yticks(list(range(len(words))), reversed(words), color="C0")
# NOTE: The stopwords and parts of speech functions # changed with the 3rd printing of the book. from collections import Counter import matplotlib.pyplot as plt import nltk from nltk.corpus import stopwords LINES = ['-', ':', '--'] # Line style for plots. def main(): # Load text files into dictionary by author. strings_by_author = dict() strings_by_author['doyle'] = text_to_string('hound.txt') strings_by_author['wells'] = text_to_string('war.txt') strings_by_author['unknown'] = text_to_string('lost.txt') # Check results of reading files. print(strings_by_author['doyle'][:300]) # Tokenize text strings and run stylometric tests. words_by_author = make_word_dict(strings_by_author) len_shortest_corpus = find_shortest_corpus(words_by_author) word_length_test(words_by_author, len_shortest_corpus) stopwords_test(words_by_author, len_shortest_corpus) parts_of_speech_test(words_by_author, len_shortest_corpus) vocab_test(words_by_author) jaccard_test(words_by_author, len_shortest_corpus) def text_to_string(filename): """Read a text file and return a string.""" with open(filename) as infile: return infile.read() def make_word_dict(strings_by_author): """Return dictionary of tokenized words by corpus by author.""" words_by_author = dict() for author in strings_by_author: tokens = nltk.word_tokenize(strings_by_author[author]) words_by_author[author] = ([token.lower() for token in tokens if token.isalpha()]) return words_by_author def find_shortest_corpus(words_by_author): """Return length of shortest corpus.""" word_count = [] for author in words_by_author: word_count.append(len(words_by_author[author])) print('\nNumber of words for {} = {}\n'. format(author, len(words_by_author[author]))) len_shortest_corpus = min(word_count) print('length shortest corpus = {}\n'.format(len_shortest_corpus)) return len_shortest_corpus def word_length_test(words_by_author, len_shortest_corpus): """Plot word length freq by author, truncated to shortest corpus length.""" by_author_length_freq_dist = dict() plt.figure(1) plt.ion() for i, author in enumerate(words_by_author): word_lengths = [len(word) for word in words_by_author[author] [:len_shortest_corpus]] by_author_length_freq_dist[author] = nltk.FreqDist(word_lengths) by_author_length_freq_dist[author].plot(15, linestyle=LINES[i], label=author, title='Word Length') plt.legend() ## plt.show() # Uncomment to see plot while coding function. def stopwords_test(words_by_author, len_shortest_corpus): """Plot stopwords freq by author, truncated to shortest corpus length.""" fdist = dict() plt.figure(2) stop_words = stopwords.words('english') for i, author in enumerate(words_by_author): stopwords_by_author = [word for word in words_by_author[author] [:len_shortest_corpus] if word in stop_words] fdist[author] = {word: stopwords_by_author.count(word) for word in stop_words[:50]} # Use first 50 of 179 stopwords. k, v = list(fdist[author].keys()), list(fdist[author].values()) plt.plot(k, v, label=author, linestyle=LINES[i], lw=1) ## plt.xticks([]) # Turn off labels if plotting >50 stopwords. plt.title('First 50 Stopwords') plt.legend() plt.xticks(rotation=90) ## plt.show() def parts_of_speech_test(words_by_author, len_shortest_corpus): """Plot author use of parts-of-speech such as nouns, verbs, adverbs,etc.""" fdist = dict() colors = ['k', 'lightgrey', 'grey'] plt.figure(3) for i, author in enumerate(words_by_author): pos_by_author = [pos[1] for pos in nltk.pos_tag(words_by_author[author] [:len_shortest_corpus])] fdist[author] = Counter(pos_by_author) k, v = list(fdist[author].keys()), list(fdist[author].values()) plt.plot(k, v, linestyle='', marker='^', c=colors[i], label=author) plt.title('Parts of Speech') plt.legend() plt.xticks(rotation=90) ## plt.show() def vocab_test(words_by_author): """Compare author vocabularies using the Chi Squared statistical test.""" chisquared_by_author = dict() for author in words_by_author: if author != 'unknown': # Combine corpus for author & unknown & find 1000 most-common words. combined_corpus = (words_by_author[author] + words_by_author['unknown']) author_proportion = (len(words_by_author[author])/ len(combined_corpus)) combined_freq_dist = nltk.FreqDist(combined_corpus) most_common_words = list(combined_freq_dist.most_common(1000)) chisquared = 0 # Calculate observed vs. expected word counts. for word, combined_count in most_common_words: observed_count_author = words_by_author[author].count(word) expected_count_author = combined_count * author_proportion chisquared += ((observed_count_author - expected_count_author)**2 / expected_count_author) chisquared_by_author[author] = chisquared print('Chi-squared for {} = {:.1f}'.format(author, chisquared)) most_likely_author = min(chisquared_by_author, key=chisquared_by_author.get) print('Most-likely author by vocabulary is {}\n'.format(most_likely_author)) def jaccard_test(words_by_author, len_shortest_corpus): """Calculate Jaccard similarity of each known corpus to unknown corpus.""" jaccard_by_author = dict() unique_words_unknown = set(words_by_author['unknown'] [:len_shortest_corpus]) authors = (author for author in words_by_author if author != 'unknown') for author in authors: unique_words_author = set(words_by_author[author][:len_shortest_corpus]) shared_words = unique_words_author.intersection(unique_words_unknown) jaccard_sim = (float(len(shared_words))/ (len(unique_words_author) + len(unique_words_unknown) - len(shared_words))) jaccard_by_author[author] = jaccard_sim print('Jaccard Similarity for {} = {}'.format(author, jaccard_sim)) most_likely_author = max(jaccard_by_author, key=jaccard_by_author.get) print('Most-likely author by similarity is {}'.format(most_likely_author)) if __name__ == '__main__': main()
Textos utilitzats pels codigs:
Imatges utilitazades: No hi ha imatges.
En que consisteix el Chapter 3:
Els codis que té el Chapter 3:
import requests import bs4 from nltk.tokenize import sent_tokenize from gensim.summarization import summarize url = 'https://jamesclear.com/great-speeches/make-your-bed-by-admiral-william-h-mcraven' page = requests.get(url) page.raise_for_status() soup = bs4.BeautifulSoup(page.text, 'html.parser') p_elems = [element.text for element in soup.find_all('p')] speech = ' '.join(p_elems) # Be sure to join using a space! print("\nSummary of Make Your Bed speech:") print(summarize(speech, word_count=225)) # Note: This is an update to the 1st printing
""" Gensim 4.0, released March 25, 2021, dropped the Summarization module. To run this program install Gensim 3.8.3 (https://pypi.org/project/gensim/3.8.3/) """ from collections import Counter import re import requests import bs4 import nltk from nltk.corpus import stopwords def main(): # Use webscraping to obtain the text. url = 'http://www.analytictech.com/mb021/mlk.htm' page = requests.get(url) page.raise_for_status() soup = bs4.BeautifulSoup(page.text, 'html.parser') p_elems = [element.text for element in soup.find_all('p')] speech = ' '.join(p_elems) # Make sure to join on a space! # Fix typos, remove extra spaces, digits, and punctuation. speech = speech.replace(')mowing', 'knowing') speech = re.sub('\s+', ' ', speech) speech_edit = re.sub('[^a-zA-Z]', ' ', speech) speech_edit = re.sub('\s+', ' ', speech_edit) # Request input. while True: max_words = input("Enter max words per sentence for summary: ") num_sents = input("Enter number of sentences for summary: ") if max_words.isdigit() and num_sents.isdigit(): break else: print("\nInput must be in whole numbers.\n") # Run functions to generate sentence scores. speech_edit_no_stop = remove_stop_words(speech_edit) word_freq = get_word_freq(speech_edit_no_stop) sent_scores = score_sentences(speech, word_freq, max_words) # Print the top-ranked sentences. counts = Counter(sent_scores) summary = counts.most_common(int(num_sents)) print("\nSUMMARY:") for i in summary: print(i[0]) def remove_stop_words(speech_edit): """Remove stop words from string and return string.""" stop_words = set(stopwords.words('english')) speech_edit_no_stop = '' for word in nltk.word_tokenize(speech_edit): if word.lower() not in stop_words: speech_edit_no_stop += word + ' ' return speech_edit_no_stop def get_word_freq(speech_edit_no_stop): """Return a dictionary of word frequency in a string.""" word_freq = nltk.FreqDist(nltk.word_tokenize(speech_edit_no_stop.lower())) return word_freq def score_sentences(speech, word_freq, max_words): """Return dictionary of sentence scores based on word frequency.""" sent_scores = dict() sentences = nltk.sent_tokenize(speech) for sent in sentences: sent_scores[sent] = 0 words = nltk.word_tokenize(sent.lower()) sent_word_count = len(words) if sent_word_count <= int(max_words): for word in words: if word in word_freq.keys(): sent_scores[sent] += word_freq[word] sent_scores[sent] = sent_scores[sent] / sent_word_count return sent_scores if __name__ == '__main__': main()
import numpy as np from PIL import Image import matplotlib.pyplot as plt from wordcloud import WordCloud, STOPWORDS # Load a text file as a string. with open('hound.txt') as infile: text = infile.read() # Load an image as a NumPy array. mask = np.array(Image.open('holmes.png')) # Get stop words as a set and add extra words. stopwords = STOPWORDS stopwords.update(['us', 'one', 'will', 'said', 'now', 'well', 'man', 'may', 'little', 'say', 'must', 'way', 'long', 'yet', 'mean', 'put', 'seem', 'asked', 'made', 'half', 'much', 'certainly', 'might', 'came']) # Generate word cloud. wc = WordCloud(max_words=500, relative_scaling=0.5, mask=mask, background_color='white', stopwords=stopwords, margin=2, random_state=7, contour_width=2, contour_color='brown', colormap='copper').generate(text) # Turn wc object into an array. colors = wc.to_array() # Plot and save word cloud. plt.figure() plt.title("Chamberlain Hunt Academy Senior Class Presents:\n", fontsize=15, color='brown') plt.text(-10, 0, "The Hound of the Baskervilles", fontsize=20, fontweight='bold', color='brown') plt.suptitle("7:00 pm May 10-12 McComb Auditorium", x=0.52, y=0.095, fontsize=15, color='brown') plt.imshow(colors, interpolation="bilinear") plt.axis('off') plt.show() ##plt.savefig('hound_wordcloud.png')
Textos utilitzats pels codigs:
Imatges utilitazades:
En que consisteix el Chapter 4:
Els codis que té el Chapter 4:
"""Book code using the novel The Lost World For words not in book, spell-out with first letter of words. Flag 'first letter mode' by bracketing between alternating 'a a' and 'the the'. credit: Eric T. Mortenson """ import sys import os import random import string from collections import defaultdict, Counter def main(): message = input("Enter plaintext or ciphertext: ") process = input("Enter 'encrypt' or 'decrypt': ") shift = int(input("Shift value (1-365) = ")) infile = input("Enter filename with extension: ") if not os.path.exists(infile): print("File {} not found. Terminating.".format(infile), file=sys.stderr) sys.exit(1) word_list = load_file(infile) word_dict = make_dict(word_list, shift) letter_dict = make_letter_dict(word_list) if process == 'encrypt': ciphertext = encrypt(message, word_dict, letter_dict) count = Counter(ciphertext) encryptedWordList = [] for number in ciphertext: encryptedWordList.append(word_list[number - shift]) print("\nencrypted word list = \n {} \n" .format(' '.join(encryptedWordList))) print("encrypted ciphertext = \n {}\n".format(ciphertext)) # Check the encryption by decrypting the ciphertext. print("decrypted plaintext = ") singleFirstCheck = False for cnt, i in enumerate(ciphertext): if word_list[ciphertext[cnt]-shift] == 'a' and \ word_list[ciphertext[cnt+1]-shift] == 'a': continue if word_list[ciphertext[cnt]-shift] == 'a' and \ word_list[ciphertext[cnt-1]-shift] == 'a': singleFirstCheck = True continue if singleFirstCheck == True and cnt (ciphertext)-1 and \ word_list[ciphertext[cnt]-shift] == 'the' and \ word_list[ciphertext[cnt+1]-shift] == 'the': continue if singleFirstCheck == True and \ word_list[ciphertext[cnt]-shift] == 'the' and \ word_list[ciphertext[cnt-1]-shift] == 'the': singleFirstCheck = False print(' ', end='', flush=True) continue if singleFirstCheck == True: print(word_list[i - shift][0], end = '', flush=True) if singleFirstCheck == False: print(word_list[i - shift], end=' ', flush=True) elif process == 'decrypt': plaintext = decrypt(message, word_list, shift) print("\ndecrypted plaintext = \n {}".format(plaintext)) def load_file(infile): """Read and return text file as a list of lowercase words.""" with open(infile, encoding='utf-8') as file: words = [word.lower() for line in file for word in line.split()] words_no_punct = ["".join(char for char in word if char not in \ string.punctuation) for word in words] return words_no_punct def make_dict(word_list, shift): """Return dictionary of characters as keys and shifted indexes as values.""" word_dict = defaultdict(list) for index, word in enumerate(word_list): word_dict[word].append(index + shift) return word_dict def make_letter_dict(word_list): firstLetterDict = defaultdict(list) for word in word_list: if len(word) > 0: if word[0].isalpha(): firstLetterDict[word[0]].append(word) return firstLetterDict def encrypt(message, word_dict, letter_dict): """Return list of indexes representing characters in a message.""" encrypted = [] # remove punctuation from message words messageWords = message.lower().split() messageWordsNoPunct = ["".join(char for char in word if char not in \ string.punctuation) for word in messageWords] for word in messageWordsNoPunct: if len(word_dict[word]) > 1: index = random.choice(word_dict[word]) elif len(word_dict[word]) == 1: # Random.choice fails if only 1 choice. index = word_dict[word][0] elif len(word_dict[word]) == 0: # Word not in word_dict. encrypted.append(random.choice(word_dict['a'])) encrypted.append(random.choice(word_dict['a'])) for letter in word: if letter not in letter_dict.keys(): print('\nLetter {} not in letter-to-word dictionary.' .format(letter), file=sys.stderr) continue if len(letter_dict[letter])>1: newWord =random.choice(letter_dict[letter]) else: newWord = letter_dict[letter][0] if len(word_dict[newWord])>1: index = random.choice(word_dict[newWord]) else: index = word_dict[newWord][0] encrypted.append(index) encrypted.append(random.choice(word_dict['the'])) encrypted.append(random.choice(word_dict['the'])) continue encrypted.append(index) return encrypted def decrypt(message, word_list, shift): """Decrypt ciphertext string and return plaintext word string. This shows how plaintext looks before extracting first letters. """ plaintextList = [] indexes = [s.replace(',', '').replace('[', '').replace(']', '') for s in message.split()] for count, i in enumerate(indexes): plaintextList.append(word_list[int(i) - shift]) return ' '.join(plaintextList) def check_for_fail(ciphertext): """Return True if ciphertext contains any duplicate keys.""" check = [k for k, v in Counter(ciphertext).items() if v > 1] if len(check) > 0: print(check) return True if __name__ == '__main__': main()
"""Plot barchart of characters in text file.""" import sys import os import operator from collections import Counter import matplotlib.pyplot as plt def load_file(infile): """Read and return text file as string of lowercase characters.""" with open(infile) as f: text = f.read().lower() return text def main(): infile = 'lost.txt' if not os.path.exists(infile): print("File {} not found. Terminating.".format(infile), file=sys.stderr) sys.exit(1) text = load_file(infile) # Make bar chart of characters in text and their frequency. char_freq = Counter(text) char_freq_sorted = sorted(char_freq.items(), key=operator.itemgetter(1), reverse=True) x, y = zip(*char_freq_sorted) # * unpacks iterable. fig, ax = plt.subplots() ax.bar(x, y) fig.show() if __name__ == '__main__': main()
import sys import os import random from collections import defaultdict, Counter def main(): message = input("Enter plaintext or ciphertext: ") process = input("Enter 'encrypt' or 'decrypt': ") while process not in ('encrypt', 'decrypt'): process = input("Invalid process. Enter 'encrypt' or 'decrypt': ") shift = int(input("Shift value (1-366) = ")) while not 1 <= shift <= 366: shift = int(input("Invalid value. Enter digit from 1 to 366: ")) infile = input("Enter filename with extension: ") if not os.path.exists(infile): print("File {} not found. Terminating.".format(infile), file=sys.stderr) sys.exit(1) text = load_file(infile) char_dict = make_dict(text, shift) if process == 'encrypt': ciphertext = encrypt(message, char_dict) # Run QC protocols and print results. if check_for_fail(ciphertext): print("\nProblem finding unique keys.", file=sys.stderr) print("Try again, change message, or change code book.\n", file=sys.stderr) sys.exit() print("\nCharacter and number of occurrences in char_dict: \n") print("{: >10}{: >10}{: >10}".format('Character', 'Unicode', 'Count')) for key in sorted(char_dict.keys()): print('{:>10}{:>10}{:>10}'.format(repr(key)[1:-1], str(ord(key)), len(char_dict[key]))) print('\nNumber of distinct characters: {}'.format(len(char_dict))) print("Total number of characters: {:,}\n".format(len(text))) print("encrypted ciphertext = \n {}\n".format(ciphertext)) # Check the encryption by decrypting the ciphertext. print("decrypted plaintext = ") for i in ciphertext: print(text[i - shift], end='', flush=True) elif process == 'decrypt': plaintext = decrypt(message, text, shift) print("\ndecrypted plaintext = \n {}".format(plaintext)) def load_file(infile): """Read and return text file as a string of lowercase characters.""" with open(infile) as f: loaded_string = f.read().lower() return loaded_string def make_dict(text, shift): """Return dictionary of characters as keys and shifted indexes as values.""" char_dict = defaultdict(list) for index, char in enumerate(text): char_dict[char].append(index + shift) return char_dict def encrypt(message, char_dict): """Return list of indexes representing characters in a message.""" encrypted = [] for char in message.lower(): if len(char_dict[char]) > 1: index = random.choice(char_dict[char]) elif len(char_dict[char]) == 1: # Random.choice fails if only 1 choice. index = char_dict[char][0] elif len(char_dict[char]) == 0: print("\nCharacter {} not in dictionary.".format(char), file=sys.stderr) continue encrypted.append(index) return encrypted def decrypt(message, text, shift): """Decrypt ciphertext list and return plaintext string.""" plaintext = '' indexes = [s.replace(',', '').replace('[', '').replace(']', '') for s in message.split()] for i in indexes: plaintext += text[int(i) - shift] return plaintext def check_for_fail(ciphertext): """Return True if ciphertext contains any duplicate keys.""" check = [k for k, v in Counter(ciphertext).items() if v > 1] if len(check) > 0: return True if __name__ == '__main__': main()
Textos utilitzats pels codigs:
Imatges utilitazades: No hi ha imatges
En que consisteix el Chapter 5:
Els codis que té el Chapter 5:
import os from pathlib import Path import numpy as np import cv2 as cv MIN_NUM_KEYPOINT_MATCHES = 50 def main(): """Loop through 2 folders with paired images, register and blink images.""" night1_files = sorted(os.listdir('night_1')) night2_files = sorted(os.listdir('night_2')) path1 = Path.cwd() / 'night_1' path2 = Path.cwd() / 'night_2' path3 = Path.cwd() / 'night_1_registered' for i, _ in enumerate(night1_files): img1 = cv.imread(str(path1 / night1_files[i]), cv.IMREAD_GRAYSCALE) img2 = cv.imread(str(path2 / night2_files[i]), cv.IMREAD_GRAYSCALE) print("Comparing {} to {}.\n".format(night1_files[i], night2_files[i])) # Find keypoints and best matches between them. kp1, kp2, best_matches = find_best_matches(img1, img2) img_match = cv.drawMatches(img1, kp1, img2, kp2, best_matches, outImg=None) # Draw a line between the two images. height, width = img1.shape cv.line(img_match, (width, 0), (width, height), (255, 255, 255), 1) QC_best_matches(img_match) # Comment-out to ignore. # Register left-hand image using keypoints. img1_registered = register_image(img1, img2, kp1, kp2, best_matches) # QC registration and save registered image (Optional steps): blink(img1, img1_registered, 'Check Registration', num_loops=5) out_filename = '{}_registered.png'.format(night1_files[i][:-4]) cv.imwrite(str(path3 / out_filename), img1_registered) # Will overwrite! cv.destroyAllWindows() # Run the blink comparator blink(img1_registered, img2, 'Blink Comparator', num_loops=15) def find_best_matches(img1, img2): """Return list of keypoints and list of best matches for two images.""" orb = cv.ORB_create(nfeatures=100) # Initiate ORB object. # Find the keypoints and descriptors with ORB. kp1, desc1 = orb.detectAndCompute(img1, mask=None) kp2, desc2 = orb.detectAndCompute(img2, mask=None) # Find keypoint matches using Brute Force Matcher. bf = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True) matches = bf.match(desc1, desc2) # Sort matches in ascending order of distance and keep best n matches. matches = sorted(matches, key=lambda x: x.distance) best_matches = matches[:MIN_NUM_KEYPOINT_MATCHES] return kp1, kp2, best_matches def QC_best_matches(img_match): """Draw best keypoint matches connected by colored lines.""" cv.imshow('Best {} Matches'.format(MIN_NUM_KEYPOINT_MATCHES), img_match) cv.waitKey(2500) # Keeps window active 2.5 seconds. def register_image(img1, img2, kp1, kp2, best_matches): """Return first image registered to second image.""" if len(best_matches) >= MIN_NUM_KEYPOINT_MATCHES: src_pts = np.zeros((len(best_matches), 2), dtype=np.float32) dst_pts = np.zeros((len(best_matches), 2), dtype=np.float32) for i, match in enumerate(best_matches): src_pts[i, :] = kp1[match.queryIdx].pt dst_pts[i, :] = kp2[match.trainIdx].pt h_array, mask = cv.findHomography(src_pts, dst_pts, cv.RANSAC) height, width = img2.shape # Get dimensions of image 2. img1_warped = cv.warpPerspective(img1, h_array, (width, height)) return img1_warped else: print("WARNING: Number of keypoint matches < {}\n".format (MIN_NUM_KEYPOINT_MATCHES)) return img1 def blink(image_1, image_2, window_name, num_loops): """Replicate blink comparator with two images.""" for _ in range(num_loops): cv.imshow(window_name, image_1) cv.waitKey(330) cv.imshow(window_name, image_2) cv.waitKey(330) if __name__ == '__main__': main()
import os from pathlib import Path import cv2 as cv PAD = 5 # Ignore pixels this distance from edge. def find_transient(image, diff_image, pad): """Takes image, difference image, and pad value in pixels and returns boolean and location of maxVal in difference image excluding an edge rind. Draws circle around maxVal on image.""" transient = False height, width = diff_image.shape cv.rectangle(image, (PAD, PAD), (width - PAD, height - PAD), 255, 1) minVal, maxVal, minLoc, maxLoc = cv.minMaxLoc(diff_image) if pad < maxLoc[0] < width - pad and pad < maxLoc[1] < height - pad: cv.circle(image, maxLoc, 10, 255, 0) transient = True return transient, maxLoc def main(): night1_files = sorted(os.listdir('night_1_registered_transients')) night2_files = sorted(os.listdir('night_2')) path1 = Path.cwd() / 'night_1_registered_transients' path2 = Path.cwd() / 'night_2' path3 = Path.cwd() / 'night_1_2_transients' # Images should all be the same size and similar exposures. for i, _ in enumerate(night1_files[:-1]): # Leave off negative image img1 = cv.imread(str(path1 / night1_files[i]), cv.IMREAD_GRAYSCALE) img2 = cv.imread(str(path2 / night2_files[i]), cv.IMREAD_GRAYSCALE) # Get absolute difference between images. diff_imgs1_2 = cv.absdiff(img1, img2) cv.imshow('Difference', diff_imgs1_2) cv.waitKey(2000) # Copy difference image and find and circle brightest pixel. temp = diff_imgs1_2.copy() transient1, transient_loc1 = find_transient(img1, temp, PAD) # Draw black circle on temporary image to obliterate brightest spot. cv.circle(temp, transient_loc1, 10, 0, -1) # Get location of new brightest pixel and circle it on input image. transient2, transient_loc2 = find_transient(img1, temp, PAD) if transient1 or transient2: print('\nTRANSIENT DETECTED between {} and {}\n' .format(night1_files[i], night2_files[i])) font = cv.FONT_HERSHEY_COMPLEX_SMALL cv.putText(img1, night1_files[i], (10, 25), font, 1, (255, 255, 255), 1, cv.LINE_AA) cv.putText(img1, night2_files[i], (10, 55), font, 1, (255, 255, 255), 1, cv.LINE_AA) if transient1 and transient2: cv.line(img1, transient_loc1, transient_loc2, (255, 255, 255), 1, lineType=cv.LINE_AA) blended = cv.addWeighted(img1, 1, diff_imgs1_2, 1, 0) cv.imshow('Surveyed', blended) cv.waitKey(2500) # Keeps window open 2.5 seconds. out_filename = '{}_DECTECTED.png'.format(night1_files[i][:-4]) cv.imwrite(str(path3 / out_filename), blended) # Will overwrite! else: print('\nNo transient detected between {} and {}\n' .format(night1_files[i], night2_files[i])) if __name__ == '__main__': main()
import os from pathlib import Path import cv2 as cv PAD = 5 # Ignore pixels this distance from edge def find_transient(image, diff_image, pad): """Finds and draws circle around transients moving against a star field.""" transient = False height, width = diff_image.shape cv.rectangle(image, (PAD, PAD), (width - PAD, height - PAD), 255, 1) minVal, maxVal, minLoc, maxLoc = cv.minMaxLoc(diff_image) if pad < maxLoc[0] < width - pad and pad < maxLoc[1] < height - pad: cv.circle(image, maxLoc, 10, 255, 0) transient = True return transient, maxLoc def main(): night1_files = sorted(os.listdir('night_1_registered_transients')) night2_files = sorted(os.listdir('night_2')) path1 = Path.cwd() / 'night_1_registered_transients' path2 = Path.cwd() / 'night_2' path3 = Path.cwd() / 'night_1_2_transients' # Images should all be the same size and similar exposures. for i, _ in enumerate(night1_files[:-1]): # Leave off negative image img1 = cv.imread(str(path1 / night1_files[i]), cv.IMREAD_GRAYSCALE) img2 = cv.imread(str(path2 / night2_files[i]), cv.IMREAD_GRAYSCALE) # Get absolute difference between images. diff_imgs1_2 = cv.absdiff(img1, img2) cv.imshow('Difference', diff_imgs1_2) cv.waitKey(2000) # Copy difference image and find and circle brightest pixel. temp = diff_imgs1_2.copy() transient1, transient_loc1 = find_transient(img1, temp, PAD) # Draw black circle on temporary image to obliterate brightest spot. cv.circle(temp, transient_loc1, 10, 0, -1) # Get location of new brightest pixel and circle it on input image. transient2, _ = find_transient(img1, temp, PAD) if transient1 or transient2: print('\nTRANSIENT DETECTED between {} and {}\n' .format(night1_files[i], night2_files[i])) font = cv.FONT_HERSHEY_COMPLEX_SMALL cv.putText(img1, night1_files[i], (10, 25), font, 1, (255, 255, 255), 1, cv.LINE_AA) cv.putText(img1, night2_files[i], (10, 55), font, 1, (255, 255, 255), 1, cv.LINE_AA) blended = cv.addWeighted(img1, 1, diff_imgs1_2, 1, 0) cv.imshow('Surveyed', blended) cv.waitKey(2500) out_filename = '{}_DECTECTED.png'.format(night1_files[i][:-4]) cv.imwrite(str(path3 / out_filename), blended) # Will overwrite! else: print('\nNo transient detected between {} and {}\n' .format(night1_files[i], night2_files[i])) if __name__ == '__main__': main()